################################################################################################################
##########  Description: Application 2 related to number of homicides in the state of Amap by color/race - 2002 to 2012
##########  Source : Map of Violence 2014, p. 135
##########  Site : http://www.mapadaviolencia.org.br/pdf2014/Mapa2014_JovensBrasil_Preliminar.pdf   
################################
################################################################################

setwd("C:...")
dados <- read.table("homicdio-raa.txt",header=T)

attach(dados)
y      <- dados$hom
x1     <- factor(dados$raca)

Xe     <- model.matrix(~ 1 + x1)

#######################################################
## Figure: Number of homicides by type of race in Amap (BR), 2002-2012
#######################################################


XX = aggregate(y, by = list(ano, x1), FUN = "sum")

plot(XX$x ~ XX$Group.1, col= XX$Group.2, xlab = "year", ylab = "number of homicides", pch = c(15, 17)[XX$Group.2], cex = 1.5, ylim = c(0,240), main = "Homicides in Amap by race, 2002-2012")

lines(XX$x[1:11]~XX$Group.1[1:11], lwd = 2)
lines(XX$x[12:22]~XX$Group.1[12:22], lwd = 2, col = "red" , lty = 2)


legend("topleft", pch = c(17, 15), c("Black", "White"), lty = c(1, 2), lwd = 2, col = c("red", "black"))

################################################################################
###############################################
############################ Polya-Aeppli model 
###############################################
################################################################################


require(polyaAeppli)  # package of Plya-Aeppli distribution

#---------------------------------------
# likelihood function
#---------------------------------------

f = function(theta1) {
  
  betae  = theta1[1:2]
  rho    = 1/(1+exp(-theta1[3]))
 
  mu     = exp(Xe %*% betae)

  lambda = mu*(1-rho)	
 
  loglik = dPolyaAeppli(y, lambda, rho, log = TRUE)

  sum(loglik)
}

theta0 = rep(0.1,3)
m.IP   = optim(theta0, f, control = list(fnscale = -1, trace = FALSE, maxit = 10000),  method = "L-BFGS-B", hessian=TRUE)

AIC    = -2 * m.IP$value + 2 * length(m.IP$par)
BIC    = -2 * m.IP$value + length(m.IP$par) * log(length(y))
CAIC = -2 * m.IP$value + length(m.IP$par)*(log(length(y)) + 1)
cbind(c("AIC","BIC", "CAIC"),round(c(AIC,BIC, CAIC), 3))

#----------------------------------------
# Estimates
#----------------------------------------

betae     = m.IP$par[1:2]
rho       =  1/(1+exp(-m.IP$par[3]))   

mu        = exp(Xe %*% betae)

estimate  = c(betae,rho)
estimate

# Standard error

f = function(theta1) {
  
  betae  = theta1[1:2]
  rho    = theta1[3]
 
  mu     = exp(Xe %*% betae)

  lambda = mu*(1-rho)	
 
  loglik   = dPolyaAeppli(y, lambda, rho, log = TRUE)

  sum(loglik)
}

library(nlme)
obsinf  = -fdHess(estimate, f)$Hessian
covmat  = solve(obsinf)
setheta = sqrt(diag(covmat))
LInf = estimate - qnorm(0.975)*setheta
LSup = estimate + qnorm(0.975)*setheta
mest    = cbind(estimate, setheta,(estimate) / setheta,  round(2*(1-pnorm(abs(estimate) / setheta)), 3), LInf, LSup)
colnames(mest) = c("Estimate", "s.e.", "z value", "P(> |z|)", "2,5%", "97,5")

print(mest)


#############################################
########## non-parametric Bootstrap procedure
#############################################

B = 3000 # number of bootstrap samples generated
estB = matrix(0, B, 3)

for (i in 1:B){

K <- sample(1:length(y), length(y), replace=TRUE )

#---------------------------------------
# likelihood function
#---------------------------------------

f = function(theta1) {
  
  betae  = theta1[1:2]
  rho    = 1/(1+exp(-theta1[3]))
 
  mu     = exp(Xe[K,] %*% betae)

  lambda = mu*(1-rho)	
 
  loglik = dPolyaAeppli(y[K], lambda, rho, log = TRUE)

  sum(loglik)
}

theta0 = rep(0.1,3)
m.IPB   = optim(theta0, f, control = list(fnscale = -1, trace = FALSE, maxit = 10000),  method = "L-BFGS-B")

estB[i,] <- m.IPB$par

}

rhoB <- numeric()
rhoB <- 1/(1+exp(estB[,3]))

estB[,3] <- rhoB

hist(estB[,1], main = expression(paste("Histogram of ", hat(beta)[0])), xlab = expression(hat(beta)[0]))

hist(estB[,2], main = expression(paste("Histogram of ", hat(beta)[1])), xlab = expression(hat(beta)[1]))

hist(estB[,3], main = expression(paste("Histogram of ", hat(rho))), xlab = expression(hat(rho)))

summary(estB[,1])
summary(estB[,2])
summary(estB[,3])

sd(estB[,1])
sd(estB[,2])
sd(estB[,3])

################
### BCa interval
################

### calculating z0_hat (bias-correction) for each estimator

p1 <- numeric()
p1 <- which(estB[,1] < estimate[1])
length(p1)

p2 <- numeric()
p2 <- which(estB[,2] < estimate[2])
length(p2)

p3 <- numeric()
p3 <- which(estB[,3] < estimate[3])
length(p3)

z0_0 <- qnorm(length(p1)/B)
z0_1 <- qnorm(length(p2)/B)
z0_rho <- qnorm(length(p3)/B)

### calculating the a_0_hat  (acceleration) for each estimator


p = ncol(Xe)

Delete_est = matrix(0,  p + 1, length(y))

Delete_l  = numeric()
rho_ss = numeric()

for(i in 1:length(y)){

f_s = function(theta1) {
  
   beta_s   = theta1[1:2]
   rho_s    = 1/(1+exp(-theta1[3]))

   mu_s     = exp(Xe[-i,] %*% beta_s)

   lambda_s = mu_s*(1-rho_s)	
 
   loglik   = dPolyaAeppli(y[-i], lambda_s, rho_s, log = TRUE)

  sum(loglik)
}

theta0 = rep(0.1,3)
m.IP_s = optim(theta0, f_s, control = list(fnscale = -1, maxit = 10000, trace = FALSE), method = "L-BFGS-B")

rho_ss[i] = 1/(1+exp(-m.IP_s$par[3]))  

for (j in 1:p){
  
Delete_est[j,i] = m.IP_s$par[j]

}

Delete_est[p+1,i] = rho_ss[i]
Delete_l[i] =  m.IP_s$value
}

a0_0 <- sum((mean(Delete_est[1,])-Delete_est[1,])^3)/(6*(sum((mean(Delete_est[1,])-Delete_est[1,])^2))^(3/2))
a0_1 <- sum((mean(Delete_est[2,])-Delete_est[2,])^3)/(6*(sum((mean(Delete_est[2,])-Delete_est[2,])^2))^(3/2))
a0_rho <- sum((mean(Delete_est[3,])-Delete_est[3,])^3)/(6*(sum((mean(Delete_est[3,])-Delete_est[3,])^2))^(3/2))

#### Obtaing the bootstrap interval of coverage

alpha1_0 <- pnorm(z0_0 + (z0_0 + qnorm(0.05/2))/(1-a0_0*(z0_0 + qnorm(0.05/2))))
alpha2_0 <- pnorm(z0_0 + (z0_0 + qnorm(1-(0.05/2)))/(1-a0_0*(z0_0 + qnorm(1-(0.05/2)))))

alpha1_1 <- pnorm(z0_1 + (z0_1 + qnorm(0.05/2))/(1-a0_1*(z0_1 + qnorm(0.05/2))))
alpha2_1 <- pnorm(z0_1 + (z0_1 + qnorm(1-(0.05/2)))/(1-a0_1*(z0_1 + qnorm(1-(0.05/2)))))

alpha1_rho <- pnorm(z0_rho + (z0_rho + qnorm(0.05/2))/(1-a0_rho*(z0_rho + qnorm(0.05/2))))
alpha2_rho <- pnorm(z0_rho + (z0_rho + qnorm(1-(0.05/2)))/(1-a0_rho*(z0_rho + qnorm(1-(0.05/2)))))


####### Final statistics

Stat <- matrix(0, 3, 4)

#### beta_0

Stat[1,1] <- mean(estB[,1])
Stat[1, 2] <- sd(estB[,1])

L_inf0 = quantile(estB[,1], prob = alpha1_0)
L_sup0 = quantile(estB[,1], prob = alpha2_0)

Stat[1, 3] = L_inf0
Stat[1, 4] = L_sup0

#### beta_1

Stat[2,1] <- mean(estB[,2])
Stat[2, 2] <- sd(estB[,2])

L_inf1 = quantile(estB[,2], prob = alpha1_1)
L_sup1 = quantile(estB[,2], prob = alpha2_1)

Stat[2, 3] = L_inf1
Stat[2, 4] = L_sup1

#### beta_2

Stat[3,1] <- mean(estB[,3])
Stat[3, 2] <- sd(estB[,3])

L_infrho = quantile(estB[,3], prob = alpha1_rho)
L_suprho = quantile(estB[,3], prob = alpha2_rho)

Stat[3, 3] = L_infrho
Stat[3, 4] = L_suprho

colnames(Stat) = c("Estimate", "s.e.", "L_inf", "L_sup")
rownames(Stat) = c(expression(hat(beta)[0]), expression(hat(beta)[1]), expression(hat(rho)))
Stat

####################################################
####### Envelope: Log-Plya-Aeppli regression model
####################################################

par(mfrow=c(1,1))

X     = Xe
n     = nrow(X)
p     = ncol(X)

td    = (y-mu)/sqrt(mu*(1+rho)/(1-rho))
td    = sort(td)

mm    = 200

nresp = matrix(0,n,mm)

###---------------
### Generating mm samples of size n

for(i in 1:mm){
nresp[,i] = rPolyaAeppli(n, mu*(1-rho), rho)
}

###---------------
### Adjusting mm samples with the same variables

beta_ss = matrix(0, p, mm)
rho_ss  = numeric()

for(i in 1:mm){

f_s = function(theta1) {
  
   beta_s   = theta1[1:2]
   rho_s    = 1/(1+exp(-theta1[3])) 

   mu_s     = exp(Xe %*% beta_s)

   lambda_s = mu_s*(1-rho_s)	
 
   loglik   = dPolyaAeppli(nresp[,i], lambda_s, rho_s, log = TRUE)

  sum(loglik)
}

theta0 = m.IP$par
m.IP_s = optim(theta0, f_s, control = list(fnscale = -1, maxit = 10000, trace = FALSE), method = "L-BFGS-B")

rho_ss[i] = 1/(1+exp(-m.IP_s$par[3]))  

for (j in 1:p){
  
  beta_ss[j,i] = m.IP_s$par[j]

 }
}

mu_ss = matrix(0,n,mm)

for(i in 1:mm){
  mu_ss[,i] = exp(Xe %*% beta_ss[, i])
}

resid = matrix(0,n,mm)

for(i in 1:mm){
  resid[,i]  = (nresp[,i]-mu_ss[,i])/sqrt(mu_ss[,i]*(1+rho_ss[i])/(1-rho_ss[i]))
}

order_resid = matrix(0,n,mm)

for(i in 1:mm){
  order_resid[,i] = sort(resid[,i])
}

min_res = numeric()
max_res = numeric()
mean_res = numeric()

for(i in 1:n){
  min_res[i]  = min(order_resid[i,])
  max_res[i]  = max(order_resid[i,])
  mean_res[i] = mean(order_resid[i,])
}

faixa = range(td,min_res, max_res)

par(pty="s")
qqnorm(td,xlab="Percentil da N(0,1)", ylim=faixa,ylab="Pearson residue", pch=16, main=" Plya-Aeppli envelope")
par(new=T)
#
qqnorm(min_res,axes=F,xlab="",ylab="",type="l", lty=1, main="", ylim=faixa)
par(new=T)
qqnorm(max_res,axes=F,xlab="",ylab="", type="l",lty=1, main="", ylim=faixa)
par(new=T)
qqnorm(mean_res,axes=F,xlab="", ylab="", type="l", lty=2, main="", ylim=faixa)

###############################
#################### Diagnostic
###############################

p = ncol(Xe)

Delete_est = matrix(0,  p + 1, length(y))
Delete_l  = numeric()
rho_ss = numeric()

for(i in 1:length(y)){

f_s = function(theta1) {
  
   beta_s   = theta1[1:2]
   rho_s    = 1/(1+exp(-theta1[3])) 

   mu_s     = exp(Xe[-i,] %*% beta_s)

   lambda_s = mu_s*(1-rho_s)	
 
   loglik   = dPolyaAeppli(y[-i], lambda_s, rho_s, log = TRUE)

  sum(loglik)
}

theta0 = rep(0.1,3)
m.IP_s = optim(theta0, f_s, control = list(fnscale = -1, maxit = 10000, trace = FALSE), method = "L-BFGS-B")

rho_ss[i] = 1/(1+exp(-m.IP_s$par[3]))  

for (j in 1:p){
  
Delete_est[j,i] = m.IP_s$par[j]

}

Delete_est[p+1,i] = rho_ss[i]
Delete_l[i] =  m.IP_s$value

}

#---------------------------------------
# Generalized Cooks distance
#---------------------------------------


GD_i = numeric()

for(i in 1:length(y)){
GD_i[i] = (Delete_est[,i]-estimate)%*%(obsinf)%*%(Delete_est[,i]-estimate)
}

plot(GD_i, ylab = expression(GD[i]), main ="Generalized Cooks distance")
identify(GD_i, n=4)

#---------------------------------------
# Likelihood distance
#---------------------------------------

LD_i = numeric()

for(i in 1:length(y)){
LD_i[i] = abs(2*(m.IP$value - Delete_l[i]))
}

plot(LD_i, ylab = expression(LD[i]), main="Likelihood distance")
identify(LD_i, n=3)

###################################################################
####### Assessing the influence of observations on the estimates 
###################################################################

i = c(9, 7, 17,20,22)   # Observations to be excluded to the sample

f_si = function(theta1) {
  
   beta_s   = theta1[1:2]
   rho_s    = 1/(1+exp(-theta1[3])) 

   mu_s     = exp(Xe[-i,] %*% beta_s)

   lambda_s = mu_s*(1-rho_s)	
 
   loglik   = dPolyaAeppli(y[-i], lambda_s, rho_s, log = TRUE)

  sum(loglik)
}

theta0 = rep(0.1,3)
m.IP_si = optim(theta0, f_si, control = list(fnscale = -1, maxit = 10000, trace = FALSE), method = "L-BFGS-B")

#----------------------------------------
# Estimates
#----------------------------------------

beta_i     = m.IP_si$par[1:2]
rho_i       =  1/(1+exp(-m.IP_si$par[3]))   

mu_i        = exp(Xe[-i,] %*% beta_i)

estimate_i  = c(beta_i,rho_i)

#----------------------------------------
# Standard error
#----------------------------------------

f_i = function(theta1) {
  
  betae  = theta1[1:2]
  rho    = theta1[3]
 
  mu     = exp(Xe[-i,] %*% betae)

  lambda = mu*(1-rho)	
 
  loglik   = dPolyaAeppli(y[-i], lambda, rho, log = TRUE)

  sum(loglik)
}

library(nlme)
obsinf_i  = -fdHess(estimate_i, f_i)$Hessian
covmat_i  = solve(obsinf_i)
setheta_i = sqrt(diag(covmat_i))
LInf_i = estimate_i - qnorm(0.975)*setheta_i
LSup_i = estimate_i + qnorm(0.975)*setheta_i
mest_i    = cbind(estimate_i, setheta_i,(estimate_i) / setheta_i, 2*(1-pnorm(abs(estimate_i) / setheta_i)), LInf_i, LSup_i)
colnames(mest_i) = c("Estimate", "s.e.", "z value", "P(> |z|)", "2,5%", "97,5")

print(mest_i)


# likelihood ratio statistics - Poisson (rho=0) vs Polya-Aeppli

log.vero_PO = logLik(glm(y[-i]~x1[-i],family="poisson"))[1]
log.vero_PA = m.IP_si$value

LRS = -2*(log.vero_PO-log.vero_PA) 
LRS

1-pchisq(LRS, 1)

################################################################################
###############################################
################################# Poisson model 
###############################################
################################################################################

Xe        = model.matrix(~ 1 + x1)
X         =  Xe
n         = nrow(X)
p         = ncol(X)

fit.model = glm(y ~ 1 + x1, family = poisson)
summary(fit.model)

AIC(fit.model)

BIC(fit.model)

CAIC = -2*logLik(fit.model)[1] +  dim(Xe)[2]*(log(length(y))+ 1)
CAIC

####################################################
####### Envelope: Poisson regression model
####################################################

mu_po     = fitted(fit.model)
td_po     = resid(fit.model,type="pearson")

mm        = 200

nresp_po  = matrix(0,n,mm)

###---------------
### Generating mm samples of size n

for(i in 1:mm){
  nresp_po[,i] = rpois(n, mu_po)
}
  resid_po = matrix(0,n,mm)

for(i in 1:mm){

  fit = glm(nresp_po[,i] ~ X, family=poisson)
  
  resid_po[,i]   = resid(fit,type="pearson")
	}
  order_resid_po = matrix(0,n,mm)

for(i in 1:mm){
  order_resid_po[,i] = sort(resid_po[,i])
}

min_res_po  = numeric()
max_res_po  = numeric()
mean_res_po = numeric()

for(i in 1:n){
  min_res_po[i]  = min(order_resid_po[i,])
  max_res_po[i]  = max(order_resid_po[i,])
  mean_res_po[i] = mean(order_resid_po[i,])
}

faixa = range(td_po,min_res_po, max_res_po)

qqnorm(td_po,xlab="Percentil da N(0,1)", ylim=faixa,ylab="Pearson residue", pch=16, main="Poisson envelope")
par(new=T)
#
qqnorm(min_res_po,axes=F,xlab="",ylab="",type="l", lty=1, main="", ylim=faixa)
par(new=T)
qqnorm(max_res_po,axes=F,xlab="",ylab="", type="l",lty=1, main="", ylim=faixa)
par(new=T)
qqnorm(mean_res_po,axes=F,xlab="", ylab="", type="l", lty=2, main="", ylim=faixa)


################################################################################
####################################################
############################ negative binomial model 
####################################################
################################################################################

require(MASS)

Xe     = model.matrix(~ 1 + x1)
X      = Xe
n      = nrow(X)
p      = ncol(X)

fit.model = glm.nb(y ~ 1 + x1)
summary(fit.model)

AIC(fit.model)

BIC(fit.model)

CAIC = -2*logLik(fit.model)[1] + (dim(Xe)[2]+1)*(log(length(y))+ 1)
CAIC

####################################################
####### Envelope: Binomial negative regression model
####################################################

mu_bn  = fitted(fit.model)
fi     = fit.model$theta

td_bn  = resid(fit.model,type="pearson")

mm       = 200

nresp_bn = matrix(0,n,mm)

###---------------
### Generating mm samples of size n

for(i in 1:mm){
  nresp_bn[,i] = rnegbin(n, mu_bn,fi)
}
  resid_bn = matrix(0,n,mm)

for(i in 1:mm){

  fit = glm.nb(nresp_bn[,i] ~ X, control = glm.control(maxit = 200))
  resid_bn[,i]  = resid(fit,type="pearson")
}

order_resid_bn  = matrix(0,n,mm)

for(i in 1:mm){
  order_resid_bn[,i] = sort(resid_bn[,i])
}

min_res_bn = numeric()
max_res_bn = numeric()
mean_res_bn = numeric()


for(i in 1:n){
min_res_bn[i]  = min(order_resid_bn[i,])
max_res_bn[i]  = max(order_resid_bn[i,])
mean_res_bn[i] = mean(order_resid_bn[i,])
}

faixa = range(td_bn,min_res_bn, max_res_bn)

qqnorm(td_bn,xlab="Percentil da N(0,1)", ylim=faixa,ylab="Pearson residue", pch=16, main="Negative binomial envelope")
par(new=T)
#
qqnorm(min_res_bn,axes=F,xlab="",ylab="",type="l", lty=1, main="", ylim=faixa)
par(new=T)
qqnorm(max_res_bn,axes=F,xlab="",ylab="", type="l",lty=1, main="", ylim=faixa)
par(new=T)
qqnorm(mean_res_bn,axes=F,xlab="", ylab="", type="l", lty=2, main="", ylim=faixa)

######################################################
######################################################
##### Predictive measures: Homicides by Race (Table 5). The following R code for the analyses of Table 5 was adapted from Supplementary Materials of Czado, C., Gneiting, T., and Held, L. (2009).
##### Czado, C., Gneiting, T., and Held, L. (2009). Predictive model assessment for count data. Biometrics, 65, 1254-1261. Supplementary materials are avaliable at http://www.biometrics.tibs.org. (Acessed on 10/10/2016).
######################################################
######################################################


### count regressions in cross validation mode 

n <- length(hom)

### Poisson regression 

p.pois.lambda <- rep(0,n)
p.pois.Px <- rep(0,n) 
p.pois.Px1 <- rep(0,n)
p.pois.px <- rep(0,n) 

temp.po <- glm(hom ~ raca, family=poisson)
XX  <- model.matrix(temp.po)

for (i in 1:n)
  {
  temp <- glm(hom[-i] ~ raca[-i], family=poisson)
  beta <- coef(temp)
  p.pois.lambda[i] <- exp(t(beta)%*%XX[i,])
  p.pois.Px[i] <- ppois(hom[i],p.pois.lambda[i])
  p.pois.Px1[i] <- ppois(hom[i]-1,p.pois.lambda[i])
  p.pois.px[i] <- dpois(hom[i],p.pois.lambda[i])
  }

### negative binomial regression 

p.nb.lambda <- rep(0,n)
p.nb.theta <- rep(0,n)
p.nb.Px <- rep(0,n)
p.nb.Px1 <- rep(0,n)
p.nb.px <- rep(0,n) 
 
library(MASS)    

temp.nb <- glm.nb(hom ~ raca)
XX  <- model.matrix(temp.nb)

for (i in 1:n)
  {
  temp <- glm.nb(hom[-i] ~ raca[-i])
  beta <- coef(temp)
  p.nb.lambda[i] <- exp(t(beta)%*%XX[i,])
  p.nb.theta[i] <- temp$theta
  p.nb.Px[i] <- pnbinom(hom[i],size=p.nb.theta[i],mu=p.nb.lambda[i])
  p.nb.Px1[i] <- pnbinom(hom[i]-1,size=p.nb.theta[i],mu=p.nb.lambda[i])
  p.nb.px[i] <- dnbinom(hom[i],size=p.nb.theta[i],mu=p.nb.lambda[i])
  }

### PA regression

p.pa.lambda <- rep(0,n)
p.pa.rho <- rep(0,n)
p.pa.Px <- rep(0,n)
p.pa.px <- rep(0,n) 

 y = hom
 x1 <- factor(raca)
 
 Xe     = model.matrix(~ x1)

require(polyaAeppli)  # package of Plya-Aeppli distribution

for (i in 1:n){

  	f = function(theta1) {
		betae  = theta1[1:dim(Xe)[2]]
  		rho    = 1/(1+exp(-theta1[dim(Xe)[2]+1]))
 		mu     = exp(Xe[-i,] %*% betae)
		lambda = mu*(1-rho)	
 		loglik = dPolyaAeppli(y[-i], lambda, rho, log = TRUE)
 	 	sum(loglik)
	}

theta0 = rep(0.1,dim(Xe)[2]+1)
m.IP   = optim(theta0, f, control = list(fnscale = -1, trace = FALSE, maxit = 10000),  method = "L-BFGS-B")

beta <- m.IP$par[1:dim(Xe)[2]]
p.pa.lambda[i] <- exp(Xe[i,] %*% beta)
p.pa.rho[i] <-  1/(1+exp(-m.IP$par[dim(Xe)[2]+1]))
p.pa.Px[i] <- pPolyaAeppli(y[i], p.pa.lambda[i]*(1-p.pa.rho[i]), p.pa.rho[i])
p.pa.px[i] <- dPolyaAeppli(y[i], p.pa.lambda[i]*(1-p.pa.rho[i]), p.pa.rho[i])
}

### parameter settings for computing scores

kk <- 100000                            ### cut-off for summations 
my.k <- 0:kk                         ### to handle ranked probability score

##################
### compute scores
##################

### Poisson regression 

p.pois.logs <- - log(p.pois.px) 
  p.pois.norm <- 1:n
  for (i in 1:n) {p.pois.norm[i] <- sum(dpois(my.k,p.pois.lambda[i])^2)} 
p.pois.qs <- - 2*p.pois.px + p.pois.norm
p.pois.sphs <- - p.pois.px / sqrt(p.pois.norm)
p.pois.rps <- 1:n 
  for (i in 1:n) 
    {p.pois.rps[i] <- sum(ppois((-1):(hom[i]-1),p.pois.lambda[i])^2) + sum((ppois(hom[i]:kk,p.pois.lambda[i])-1)^2)}
p.pois.dss <- (hom-p.pois.lambda)^2/p.pois.lambda + 2*log(sqrt(p.pois.lambda))
p.pois.ses <- (hom-p.pois.lambda)^2

### Negative binomial regression 

p.nb.px <- dnbinom(hom,mu=p.nb.lambda,size=p.nb.theta)
p.nb.logs <- - log(p.nb.px)
  p.nb.norm <- 1:n
  for (i in 1:n) 
    {p.nb.norm[i] <- sum(dnbinom(my.k,mu=p.nb.lambda[i],size=p.nb.theta[i])^2)} 
p.nb.qs <- - 2*p.nb.px + p.nb.norm
p.nb.sphs <- - p.nb.px / sqrt(p.nb.norm)
p.nb.rps <- 1:n 
  for (i in 1:n) 
    {
    p.nb.rps[i] <- sum(pnbinom((-1):(hom[i]-1),mu=p.nb.lambda[i],size=p.nb.theta[i])^2) 
    p.nb.rps[i] <- p.nb.rps[i] + sum((pnbinom(hom[i]:kk,mu=p.nb.lambda[i],size=p.nb.theta[i])-1)^2)
    }
p.nb.dss <- (hom-p.nb.lambda)^2/(p.nb.lambda*(1+p.nb.lambda/p.nb.theta)) + 2*log(sqrt(p.nb.lambda*(1+p.nb.lambda/p.nb.theta)))
p.nb.ses <- (hom-p.nb.lambda)^2

### Polya-Aeppli regression 

p.pa.px <- dPolyaAeppli(hom, p.pa.lambda*(1-p.pa.rho), p.pa.rho) 
p.pa.logs <- - log(p.pa.px)
  p.pa.norm <- 1:n
  for (i in 1:n) 
    {p.pa.norm[i] <- sum(dPolyaAeppli(my.k, p.pa.lambda[i]*(1-p.pa.rho[i]), p.pa.rho[i])^2)} 
p.pa.qs <- - 2*p.pa.px + p.pa.norm
p.pa.sphs <- - p.pa.px / sqrt(p.pa.norm)
p.pa.rps <- 1:n 
  for (i in 1:n) 
    {
    p.pa.rps[i] <- sum(pPolyaAeppli((-1):(hom[i]-1), p.pa.lambda[i]*(1-p.pa.rho[i]), p.pa.rho[i])^2) 
    p.pa.rps[i] <- p.pa.rps[i] + sum((pPolyaAeppli(hom[i]:kk, p.pa.lambda[i]*(1-p.pa.rho[i]), p.pa.rho[i])-1)^2)
    }
p.pa.dss <- (hom-p.pa.lambda)^2/(p.pa.lambda*((1+p.pa.rho)/(1-p.pa.rho))) + 2*log(sqrt(p.pa.lambda*((1+p.pa.rho)/(1-p.pa.rho))))
p.pa.ses <- (hom-p.pa.lambda)^2


### reproduce Table 5 column by column 

round(c(mean(p.pois.logs), mean(p.nb.logs), mean(p.pa.logs)), 2)    ### logarithmic score
round(c(mean(p.pois.qs), mean(p.nb.qs), mean(p.pa.qs)), 2)        ### quadratic score
round(c(mean(p.pois.sphs),mean(p.nb.sphs), mean(p.pa.sphs)),2)    ### spherical score  
round(c(mean(p.pois.rps), mean(p.nb.rps), mean(p.pa.rps)),2)      ### ranked probability score
round(c(mean(p.pois.dss), mean(p.nb.dss), mean(p.pa.dss)),2)      ### Dawid-Sebastiani score
round(c(mean(p.pois.ses),mean(p.nb.ses), mean(p.pa.ses)), 2)      ### squared error score   






